import java.util.concurrent.ThreadLocalRandom;
import jdk.incubator.vector.DoubleVector;
import jdk.incubator.vector.VectorOperators;

// javac/java: --add-modules jdk.incubator.vector
public class TestVector {
	static final double[] a = new double[1_000_004];

	static double test0(double r) {
		double v0 = a[0], v1 = a[1], v2 = a[2], v3 = a[3];
		for (int i = 4; i + 4 <= a.length; i += 4) {
			double v4 = a[i], v5 = a[i + 1], v6 = a[i + 2], v7 = a[i + 3];
			v0 -= v4;
			v1 -= v5;
			v2 -= v6;
			v3 -= v7;
			r += Math.sqrt(v0 * v0 + v1 * v1 + v2 * v2 + v3 * v3);
			v0 = v4;
			v1 = v5;
			v2 = v6;
			v3 = v7;
		}
		return r;
	}

	static double test1(double r) {
		double v0 = a[0], v1 = a[1], v2 = a[2], v3 = a[3];
		for (int i = 4; i + 4 <= a.length; i += 4) {
			double v4 = a[i], v5 = a[i + 1], v6 = a[i + 2], v7 = a[i + 3];
			v0 -= v4;
			v1 -= v5;
			v2 -= v6;
			v3 -= v7;
			r += Math.sqrt(Math.fma(v0, v0, Math.fma(v1, v1, Math.fma(v2, v2, v3 * v3))));
			v0 = v4;
			v1 = v5;
			v2 = v6;
			v3 = v7;
		}
		return r;
	}

	static double test2(double r) {
		double v0 = a[0], v1 = a[1], v2 = a[2], v3 = a[3];
		for (int i = 4; i + 8 <= a.length; i += 8) {
			double v4 = a[i], v5 = a[i + 1], v6 = a[i + 2], v7 = a[i + 3];
			double va = a[i + 4], vb = a[i + 5], vc = a[i + 6], vd = a[i + 7];
			v0 -= v4;
			v1 -= v5;
			v2 -= v6;
			v3 -= v7;
			v4 -= va;
			v5 -= vb;
			v6 -= vc;
			v7 -= vd;
			r += Math.sqrt(Math.fma(v0, v0, Math.fma(v1, v1, Math.fma(v2, v2, v3 * v3))));
			r += Math.sqrt(Math.fma(v4, v4, Math.fma(v5, v5, Math.fma(v6, v6, v7 * v7))));
			v0 = va;
			v1 = vb;
			v2 = vc;
			v3 = vd;
		}
		return r;
	}

	static double test3(double r) {
		var va = DoubleVector.fromArray(DoubleVector.SPECIES_256, a, 0);
		for (int i = 4; i + 4 <= a.length; i += 4) {
			var vb = DoubleVector.fromArray(DoubleVector.SPECIES_256, a, i);
			va = va.sub(vb);
			va = va.mul(va);
			r += Math.sqrt(va.reduceLanes(VectorOperators.ADD));
			va = vb;
		}
		return r;
	}

	public static void main(String[] args) {
		var rand = ThreadLocalRandom.current();
		for (int i = 0; i < a.length; i++)
			a[i] = rand.nextDouble();

		for (int i = 0; i < 5; i++) {
			var t = System.nanoTime();
			var r = 0.0;
			for (int j = 0; j < 1000; j++)
				r = test0(r);
			System.out.println("r0 = " + r + ", " + (System.nanoTime() - t) / 1_000_000 + " ms");

			t = System.nanoTime();
			r = 0.0;
			for (int j = 0; j < 1000; j++)
				r = test1(r);
			System.out.println("r1 = " + r + ", " + (System.nanoTime() - t) / 1_000_000 + " ms");

			t = System.nanoTime();
			r = 0.0;
			for (int j = 0; j < 1000; j++)
				r = test2(r);
			System.out.println("r2 = " + r + ", " + (System.nanoTime() - t) / 1_000_000 + " ms");

			t = System.nanoTime();
			r = 0.0;
			for (int j = 0; j < 1000; j++)
				r = test3(r);
			System.out.println("r3 = " + r + ", " + (System.nanoTime() - t) / 1_000_000 + " ms");

			System.out.println("---");
		}
	}
}
